import os

import requests
from lxml import etree
if __name__ == '__main__':
    url = 'https://aspx.sc.chinaz.com/query.aspx?keyword=免费&classID=864'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
    }
    page_text = requests.get(url=url, headers=headers).text

    tree = etree.HTML(page_text)
    div_list = tree.xpath('//div[@class="sc_warp  mt20"]/div/div/div')

    if not os.path.exists('./jianli'):
        os.mkdir('./jianli')
    for div in div_list:
        jianli_url = 'https:' + div.xpath('./a/@href')[0]
        jianli_name = div.xpath('./p/a/text()')[0]
        # 到免费简历页面下载简历
        jianli_page_text = requests.get(url=jianli_url, headers=headers).text
        tree = etree.HTML(jianli_page_text)
        jianli_load_url =tree.xpath('//div[@class="down_wrap"]/div[2]/ul/li[1]/a/@href')[0]
        print(jianli_load_url)
        jianli_data = requests.get(url=jianli_load_url, headers=headers).content
        jianli_download_path = './jianli/' + jianli_name + '.rar'
        with open(jianli_download_path, 'wb') as fp:
            fp.write(jianli_data)
            print(jianli_name+ '下载成功！！！')